started: Alexey Larionov, 30Jan2017
last updated: Alexey Larionov, 01Mar2017
# Time stamp
Sys.time()
## [1] "2017-03-01 20:51:52 GMT"
# Folders
setwd("/scratch/medgen/scripts/wecare_stat_01.17/scripts")
interim_data_folder <- "/scratch/medgen/scripts/wecare_stat_01.17/interim_data"
# Required libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# --- eigenvectors --- #
load(paste(interim_data_folder, "r05a_calculate_egenvectors_wecare_only_jan2017_explore.RData", sep="/"))
rm(wecare_exac.df, wecare_genotypes.mx, wecare_kgen.df, wecare_variants.df,
common_wecare_exac.df, common_wecare_genotypes.mx, common_wecare_kgen.df, common_wecare_variants.df)
# eigenvectors for all variants
r_evectors.df <- as.data.frame(wecare.eigen$vectors[,1:10])
colnames(r_evectors.df) <- c("r_ev1", "r_ev2", "r_ev3", "r_ev4", "r_ev5", "r_ev6", "r_ev7", "r_ev8", "r_ev9", "r_ev10")
str(r_evectors.df)
## 'data.frame': 480 obs. of 10 variables:
## $ r_ev1 : num 0.00875 0.00976 -0.0044 -0.00764 -0.00231 ...
## $ r_ev2 : num -0.05411 -0.02962 0.00248 0.00195 0.00781 ...
## $ r_ev3 : num 0.01 0.009255 0.000476 0.006906 0.00144 ...
## $ r_ev4 : num 0.02113 0.00507 0.00383 0.00149 -0.00243 ...
## $ r_ev5 : num -0.184189 -0.092437 0.000649 0.003586 0.016437 ...
## $ r_ev6 : num 0.024709 0.026997 0.001998 0.003521 0.000813 ...
## $ r_ev7 : num -0.02331 -0.01648 0.00636 0.00427 0.01293 ...
## $ r_ev8 : num -4.51e-02 3.43e-03 6.41e-03 -9.41e-04 7.40e-06 ...
## $ r_ev9 : num 0.000288 -0.002909 -0.010841 0.008353 0.008717 ...
## $ r_ev10: num 0.02773 -0.0363 0.00986 0.00429 0.00511 ...
# eigenvectors for common variants
common_r_evectors.df <- as.data.frame(common_wecare.eigen$vectors[,1:10])
colnames(common_r_evectors.df) <- c("r_ev1", "r_ev2", "r_ev3", "r_ev4", "r_ev5", "r_ev6", "r_ev7", "r_ev8", "r_ev9", "r_ev10")
str(common_r_evectors.df)
## 'data.frame': 480 obs. of 10 variables:
## $ r_ev1 : num -0.15384 -0.12961 -0.00487 -0.00449 0.01592 ...
## $ r_ev2 : num 0.08499 0.0888 -0.00844 0.00263 -0.02961 ...
## $ r_ev3 : num -0.01933 0.07343 -0.00789 -0.05987 -0.01283 ...
## $ r_ev4 : num -0.00144 0.08025 0.01936 0.03319 -0.00344 ...
## $ r_ev5 : num 0.04342 -0.05186 -0.00722 -0.02284 0.00632 ...
## $ r_ev6 : num -0.000472 -0.014538 0.037114 -0.021706 -0.019502 ...
## $ r_ev7 : num 0.0506 0.041 0.0435 0.031 -0.0518 ...
## $ r_ev8 : num -0.0884 -0.0243 0.0274 0.0436 -0.0137 ...
## $ r_ev9 : num -0.0177 0.0648 -0.0398 -0.0637 0.0136 ...
## $ r_ev10: num -0.05996 -0.00134 -0.01528 -0.011 -0.01631 ...
# Common sence checks (using properties of eigenvectors)
apply(r_evectors.df,2,sum) # should be close to zeros
## r_ev1 r_ev2 r_ev3 r_ev4 r_ev5
## -8.739862e-15 -1.576349e-15 6.239041e-16 1.470693e-15 -4.530556e-15
## r_ev6 r_ev7 r_ev8 r_ev9 r_ev10
## 4.380549e-16 -4.512992e-16 3.768619e-16 -1.612209e-15 -1.892095e-15
apply(r_evectors.df,2,function(x){sum(x^2)}) # should be close to ones
## r_ev1 r_ev2 r_ev3 r_ev4 r_ev5 r_ev6 r_ev7 r_ev8 r_ev9 r_ev10
## 1 1 1 1 1 1 1 1 1 1
# --- eigenvalues --- #
# all variants
r_evalues.df <- as.data.frame(wecare.eigen$values)
colnames(r_evalues.df) <- "evals"
plot(r_evalues.df$evals, main="Eigenvalues (WECARE, all variants, R-script)")
# common variants
common_r_evalues.df <- as.data.frame(common_wecare.eigen$values)
colnames(common_r_evalues.df) <- "evals"
plot(common_r_evalues.df$evals, main="Eigenvalues (WECARE, common variants, R-script)")
# --- eigenvectors --- #
es_evectors_file="/scratch/medgen/scripts/wecare_stat_01.17/interim_data/u01_wecare_all_variants_eigenstrat_no_outliers/u01_wecare_all_variants_eigenstrat_no_outliers.evec"
common_es_evectors_file="/scratch/medgen/scripts/wecare_stat_01.17/interim_data/u01_wecare_common_variants_eigenstrat_no_outliers/u01_wecare_common_variants_eigenstrat_no_outliers.evec"
# eigenvectors for all variants
es_evectors.df <- read.table(es_evectors_file)
common_es_evectors.df <- read.table(common_es_evectors_file)
colnames(es_evectors.df) <- c("wes_id", "es_ev1", "es_ev2", "es_ev3", "es_ev4", "es_ev5", "es_ev6", "es_ev7", "es_ev8", "es_ev9", "es_ev10", "group")
colnames(common_es_evectors.df) <- c("wes_id", "es_ev1", "es_ev2", "es_ev3", "es_ev4", "es_ev5", "es_ev6", "es_ev7", "es_ev8", "es_ev9", "es_ev10", "group")
str(es_evectors.df)
## 'data.frame': 480 obs. of 12 variables:
## $ wes_id : Factor w/ 480 levels "P1_A01","P1_A02",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ es_ev1 : num 0.0157 0.0146 -0.0049 -0.0081 -0.002 -0.0064 -0.0087 -0.011 0.0283 -0.0077 ...
## $ es_ev2 : num -0.0558 -0.0315 0.0022 0.0014 0.0078 0.0066 0.0037 0.0073 -0.0578 0.0053 ...
## $ es_ev3 : num -0.0085 -0.0083 0.0002 -0.0072 -0.0017 -0.0026 -0.0034 -0.0008 -0.006 -0.0066 ...
## $ es_ev4 : num -0.0376 -0.0142 -0.0037 -0.0017 0.0035 0.0047 0.0029 0.0045 -0.0273 0.003 ...
## $ es_ev5 : num -0.1792 -0.0915 0.0005 0.0035 0.0154 ...
## $ es_ev6 : num -0.0214 -0.024 -0.0018 -0.004 -0.0011 0.0056 0.0021 -0.0024 -0.0151 -0.0041 ...
## $ es_ev7 : num 0.0058 0.0172 -0.0024 -0.0043 -0.0132 0.0068 0.0044 -0.0011 0.0261 -0.0072 ...
## $ es_ev8 : num -0.0533 -0.0026 0.0073 0.0009 0.0053 0.0028 -0.0051 -0.0028 -0.028 0.0031 ...
## $ es_ev9 : num -0.0038 0.0026 0.0117 -0.0094 -0.0083 -0.0074 0.0059 -0.006 -0.0276 -0.0017 ...
## $ es_ev10: num 0.0778 -0.0383 0.0068 -0.0006 0.0075 0 0.0016 -0.0029 0.0561 -0.0015 ...
## $ group : Factor w/ 2 levels "CBC","UBC": 1 1 1 1 1 1 2 1 1 2 ...
str(common_es_evectors.df)
## 'data.frame': 480 obs. of 12 variables:
## $ wes_id : Factor w/ 480 levels "P1_A01","P1_A02",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ es_ev1 : num -0.1554 -0.131 -0.0053 -0.0044 0.0143 ...
## $ es_ev2 : num 0.079 0.0918 -0.0106 0.0003 -0.0315 -0.0194 -0.0059 0.0628 0.0354 0.0005 ...
## $ es_ev3 : num 0.0247 -0.0669 0.0075 0.0615 0.0098 ...
## $ es_ev4 : num 0.0032 -0.081 -0.0206 -0.0316 0.004 -0.0383 0.0881 0.0054 -0.0177 0.0067 ...
## $ es_ev5 : num 0.0429 -0.0496 -0.0072 -0.0279 0.0081 0.011 0.0614 -0.0276 0.0493 0.0063 ...
## $ es_ev6 : num 0.0002 -0.0156 0.0395 -0.0229 -0.017 0.033 0.0252 0.0596 -0.0364 0.0333 ...
## $ es_ev7 : num -0.0413 -0.0364 -0.0448 -0.0363 0.0462 0.0271 -0.0081 -0.023 -0.0316 0.0481 ...
## $ es_ev8 : num -0.0956 -0.0423 0.0245 0.0437 -0.0032 0.0554 -0.0043 0.0377 0.0297 -0.0044 ...
## $ es_ev9 : num -0.0341 0.0594 -0.042 -0.0561 0.0159 -0.0531 -0.0217 -0.0393 -0.0528 -0.0355 ...
## $ es_ev10: num -0.0423 0.001 -0.0174 -0.0268 -0.0205 0.0586 0.0569 -0.0136 -0.0103 -0.0204 ...
## $ group : Factor w/ 2 levels "CBC","UBC": 1 1 1 1 1 1 2 1 1 2 ...
# Common sence checks (using properties of eigenvectors)
apply(es_evectors.df[,2:11],2,sum) # should be close to zeros
## es_ev1 es_ev2 es_ev3 es_ev4 es_ev5 es_ev6 es_ev7 es_ev8 es_ev9
## -0.0001 0.0003 0.0001 -0.0003 -0.0001 0.0010 -0.0004 -0.0008 0.0015
## es_ev10
## 0.0002
apply(es_evectors.df[,2:11],2,function(x){sum(x^2)}) # should be close to ones
## es_ev1 es_ev2 es_ev3 es_ev4 es_ev5 es_ev6 es_ev7
## 1.0000400 1.0000222 1.0000036 0.9999435 1.0000308 0.9999554 1.0000354
## es_ev8 es_ev9 es_ev10
## 0.9998510 1.0000525 1.0000621
cor.test(as.numeric(es_evectors.df$es_ev1), as.numeric(es_evectors.df$es_ev2)) # should be orthogonal
##
## Pearson's product-moment correlation
##
## data: as.numeric(es_evectors.df$es_ev1) and as.numeric(es_evectors.df$es_ev2)
## t = 0.00038719, df = 478, p-value = 0.9997
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.08948289 0.08951802
## sample estimates:
## cor
## 1.770951e-05
apply(common_es_evectors.df[,2:11],2,sum) # should be close to zeros
## es_ev1 es_ev2 es_ev3 es_ev4 es_ev5 es_ev6 es_ev7 es_ev8 es_ev9
## 0.0002 0.0008 -0.0001 -0.0002 0.0003 0.0001 -0.0009 0.0001 -0.0012
## es_ev10
## 0.0010
apply(common_es_evectors.df[,2:11],2,function(x){sum(x^2)}) # should be close to ones
## es_ev1 es_ev2 es_ev3 es_ev4 es_ev5 es_ev6 es_ev7
## 1.0000680 1.0000513 0.9999710 0.9999654 0.9999981 0.9999769 0.9999082
## es_ev8 es_ev9 es_ev10
## 0.9999181 1.0000216 1.0000522
cor.test(as.numeric(common_es_evectors.df$es_ev1), as.numeric(common_es_evectors.df$es_ev2)) # should be orthogonal
##
## Pearson's product-moment correlation
##
## data: as.numeric(common_es_evectors.df$es_ev1) and as.numeric(common_es_evectors.df$es_ev2)
## t = 0.00061781, df = 478, p-value = 0.9995
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.08947242 0.08952849
## sample estimates:
## cor
## 2.825798e-05
# --- eigenvalues --- #
es_evalues_file <- "/scratch/medgen/scripts/wecare_stat_01.17/interim_data/u01_wecare_all_variants_eigenstrat_no_outliers/u01_wecare_all_variants_eigenstrat_no_outliers.eval"
common_es_evalues_file <- "/scratch/medgen/scripts/wecare_stat_01.17/interim_data/u01_wecare_common_variants_eigenstrat_no_outliers/u01_wecare_common_variants_eigenstrat_no_outliers.eval"
es_evalues.df <- read.table(es_evalues_file)
common_es_evalues.df <- read.table(common_es_evalues_file)
colnames(es_evalues.df) <- "evals"
str(es_evalues.df)
## 'data.frame': 480 obs. of 1 variable:
## $ evals: num 2.78 2.41 2.3 2.06 2.03 ...
plot(es_evalues.df$evals, main="Eigenvalues (WECARE, all variants, EIGENSTRAT)")
colnames(common_es_evalues.df) <- "evals"
str(common_es_evalues.df)
## 'data.frame': 480 obs. of 1 variable:
## $ evals: num 2.62 2.1 1.92 1.84 1.65 ...
plot(common_es_evalues.df$evals, main="Eigenvalues (WECARE, common variants, EIGENSTRAT)")
# --- Clean-up --- #
rm(es_evectors_file, es_evalues_file, common_es_evectors_file, common_es_evalues_file)
# All variants
plot(es_evalues.df$evals, r_evalues.df$evals, main="eigenvalues: R vs EIGENSTRAT (all variants)")
abline(c(0,0),c(1,1), col="red")
# Common variants
plot(common_es_evalues.df$evals, common_r_evalues.df$evals, main="eigenvalues: R vs EIGENSTRAT (common variants)")
abline(c(0,0),c(1,1), col="red")
# Prepare dataframe
joined_evectors.df <- cbind(es_evectors.df, r_evectors.df)
# Prepare colour scale
colours <- c("UBC" = "BLUE", "CBC" = "RED")
userColourScale <- scale_colour_manual(values=colours)
# --- 1st eigenvectors --- #
g <- ggplot(joined_evectors.df, aes(es_ev1, r_ev1)) +
geom_point(aes(colour=group, fill=group, text=wes_id)) +
labs(title="1st eigenvectors: R vs EIGENSTRAT<br>WECARE, all variants") +
geom_abline(colour = "brown", linetype = 2) +
userColourScale
## Warning: Ignoring unknown aesthetics: text
ggplotly(g)
cor.test(as.numeric(joined_evectors.df$es_ev1), as.numeric(joined_evectors.df$r_ev1))
##
## Pearson's product-moment correlation
##
## data: as.numeric(joined_evectors.df$es_ev1) and as.numeric(joined_evectors.df$r_ev1)
## t = 326.06, df = 478, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9973196 0.9981272
## sample estimates:
## cor
## 0.9977595
# --- 2nd eigenvectors --- #
g <- ggplot(joined_evectors.df, aes(es_ev2, r_ev2)) +
geom_point(aes(colour=group, fill=group, text=wes_id)) +
labs(title="2nd eigenvectors: R vs EIGENSTRAT<br>WECARE, all variants") +
geom_abline(colour = "brown", linetype = 2) +
userColourScale
## Warning: Ignoring unknown aesthetics: text
ggplotly(g)
cor.test(as.numeric(joined_evectors.df$es_ev2), as.numeric(joined_evectors.df$r_ev2))
##
## Pearson's product-moment correlation
##
## data: as.numeric(joined_evectors.df$es_ev2) and as.numeric(joined_evectors.df$r_ev2)
## t = 321.43, df = 478, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9972421 0.9980731
## sample estimates:
## cor
## 0.9976947
# --- 3rd eigenvectors --- #
g <- ggplot(joined_evectors.df, aes(es_ev3, r_ev3)) +
geom_point(aes(colour=group, fill=group, text=wes_id)) +
labs(title="3rd eigenvectors: R vs EIGENSTRAT<br>WECARE, all variants") +
geom_abline(slope = -1, colour = "brown", linetype = 2) +
userColourScale
## Warning: Ignoring unknown aesthetics: text
ggplotly(g)
cor.test(as.numeric(joined_evectors.df$es_ev3), as.numeric(joined_evectors.df$r_ev3))
##
## Pearson's product-moment correlation
##
## data: as.numeric(joined_evectors.df$es_ev3) and as.numeric(joined_evectors.df$r_ev3)
## t = -688.86, df = 478, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.9995794 -0.9993978
## sample estimates:
## cor
## -0.9994967
# Clean-up
rm(joined_evectors.df)
# Prepare dataframe
common_joined_evectors.df <- cbind(common_es_evectors.df, common_r_evectors.df)
# --- 1st eigenvectors --- #
g <- ggplot(common_joined_evectors.df, aes(es_ev1, r_ev1)) +
geom_point(aes(colour=group, fill=group, text=wes_id)) +
labs(title="1st eigenvectors: R vs EIGENSTRAT<br>WECARE, common variants") +
geom_abline(colour = "brown", linetype = 2) +
userColourScale
## Warning: Ignoring unknown aesthetics: text
ggplotly(g)
cor.test(as.numeric(common_joined_evectors.df$es_ev1), as.numeric(common_joined_evectors.df$r_ev1))
##
## Pearson's product-moment correlation
##
## data: as.numeric(common_joined_evectors.df$es_ev1) and as.numeric(common_joined_evectors.df$r_ev1)
## t = 631.27, df = 478, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9992830 0.9994992
## sample estimates:
## cor
## 0.9994008
# --- 2nd eigenvectors --- #
g <- ggplot(common_joined_evectors.df, aes(es_ev2, r_ev2)) +
geom_point(aes(colour=group, fill=group, text=wes_id)) +
labs(title="2nd eigenvectors: R vs EIGENSTRAT<br>WECARE, common variants") +
geom_abline(colour = "brown", linetype = 2) +
userColourScale
## Warning: Ignoring unknown aesthetics: text
ggplotly(g)
cor.test(as.numeric(common_joined_evectors.df$es_ev2), as.numeric(common_joined_evectors.df$r_ev2))
##
## Pearson's product-moment correlation
##
## data: as.numeric(common_joined_evectors.df$es_ev2) and as.numeric(common_joined_evectors.df$r_ev2)
## t = 252.59, df = 478, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9955443 0.9968860
## sample estimates:
## cor
## 0.996275
# --- 3rd eigenvectors --- #
g <- ggplot(common_joined_evectors.df, aes(es_ev3, r_ev3)) +
geom_point(aes(colour=group, fill=group, text=wes_id)) +
labs(title="3rd eigenvectors: R vs EIGENSTRAT<br>WECARE, common variants") +
geom_abline(slope = -1, colour = "brown", linetype = 2) +
userColourScale
## Warning: Ignoring unknown aesthetics: text
ggplotly(g)
cor.test(as.numeric(common_joined_evectors.df$es_ev3), as.numeric(common_joined_evectors.df$r_ev3))
##
## Pearson's product-moment correlation
##
## data: as.numeric(common_joined_evectors.df$es_ev3) and as.numeric(common_joined_evectors.df$r_ev3)
## t = -268.6, df = 478, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.9972446 -0.9960570
## sample estimates:
## cor
## -0.9967038
# Clean-up
rm(common_joined_evectors.df)
# Prepare plot data
data2plot.df <- as.data.frame(cbind(
es_evectors.df$es_ev1,
es_evectors.df$es_ev2,
es_evectors.df$es_ev3,
wecare_phenotypes.df$eig1_gwas,
wecare_phenotypes.df$eig2_gwas,
wecare_phenotypes.df$eig3_gwas,
wecare_phenotypes.df$cc))
sum(es_evectors.df$wes_id != wecare_phenotypes.df$wes_id)
## [1] 0
data2plot.df <- cbind(es_evectors.df$wes_id, data2plot.df)
colnames(data2plot.df) <- c("wes_id", "wes_ev1", "wes_ev2", "wes_ev3",
"gwas_ev1", "gwas_ev2", "gwas_ev3", "group")
"UBC" -> data2plot.df[data2plot.df$group == 0, "group"] # "UBC" = 0 = "BLUE"
"CBC" -> data2plot.df[data2plot.df$group == 1, "group"] # "CBC" = 1 = "RED"
data2plot.df$group <- as.factor(data2plot.df$group)
str(data2plot.df)
## 'data.frame': 480 obs. of 8 variables:
## $ wes_id : Factor w/ 480 levels "P1_A01","P1_A02",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ wes_ev1 : num 0.0157 0.0146 -0.0049 -0.0081 -0.002 -0.0064 -0.0087 -0.011 0.0283 -0.0077 ...
## $ wes_ev2 : num -0.0558 -0.0315 0.0022 0.0014 0.0078 0.0066 0.0037 0.0073 -0.0578 0.0053 ...
## $ wes_ev3 : num -0.0085 -0.0083 0.0002 -0.0072 -0.0017 -0.0026 -0.0034 -0.0008 -0.006 -0.0066 ...
## $ gwas_ev1: num -0.00389 -0.00379 -0.01011 -0.01288 -0.01086 ...
## $ gwas_ev2: num 0.00266 0.00246 -0.0001 0.00595 0.01157 ...
## $ gwas_ev3: num 0.06803 0.05055 -0.00603 0.00747 0.00144 ...
## $ group : Factor w/ 2 levels "CBC","UBC": 1 1 1 1 1 1 2 1 1 2 ...
# 1st eigenvectors
g <- ggplot(data2plot.df, aes(wes_ev1, gwas_ev1)) +
geom_point(aes(colour=group, text=wes_id)) +
labs(title="1st eigenvectors: WES EIGENSTRAT vs GWAS<br>WECARE, all variants") +
geom_abline(slope = 1, colour = "brown", linetype = 2) +
userColourScale
## Warning: Ignoring unknown aesthetics: text
ggplotly(g)
cor.test(es_evectors.df$es_ev1, wecare_phenotypes.df$eig1_gwas)
##
## Pearson's product-moment correlation
##
## data: es_evectors.df$es_ev1 and wecare_phenotypes.df$eig1_gwas
## t = 15.385, df = 478, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5123857 0.6324210
## sample estimates:
## cor
## 0.5754946
# 2nd eigenvectors
g <- ggplot(data2plot.df, aes(wes_ev2, gwas_ev2)) +
geom_point(aes(colour=group, text=wes_id)) +
labs(title="2nd eigenvectors: WES EIGENSTRAT vs GWAS<br>WECARE, all variants") +
geom_abline(slope = -1, colour = "brown", linetype = 2) +
userColourScale
## Warning: Ignoring unknown aesthetics: text
ggplotly(g)
cor.test(es_evectors.df$es_ev2, wecare_phenotypes.df$eig2_gwas)
##
## Pearson's product-moment correlation
##
## data: es_evectors.df$es_ev2 and wecare_phenotypes.df$eig2_gwas
## t = -1.9772, df = 478, p-value = 0.04859
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1781314858 -0.0005711037
## sample estimates:
## cor
## -0.09006695
# 3rd eigenvectors
g <- ggplot(data2plot.df, aes(wes_ev3, gwas_ev3)) +
geom_point(aes(colour=group, text=wes_id)) +
labs(title="3rd eigenvectors: WES EIGENSTRAT vs GWAS<br>WECARE, all variants") +
geom_abline(slope = -1, colour = "brown", linetype = 2) +
userColourScale
## Warning: Ignoring unknown aesthetics: text
ggplotly(g)
cor.test(es_evectors.df$es_ev3, wecare_phenotypes.df$eig3_gwas)
##
## Pearson's product-moment correlation
##
## data: es_evectors.df$es_ev3 and wecare_phenotypes.df$eig3_gwas
## t = -1.6498, df = 478, p-value = 0.09964
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.16364520 0.01435026
## sample estimates:
## cor
## -0.07524684
# Clean-up
rm(data2plot.df, g)
# Prepare plot data
data2plot.df <- as.data.frame(cbind(
common_es_evectors.df$es_ev1,
common_es_evectors.df$es_ev2,
common_es_evectors.df$es_ev3,
wecare_phenotypes.df$eig1_gwas,
wecare_phenotypes.df$eig2_gwas,
wecare_phenotypes.df$eig3_gwas,
wecare_phenotypes.df$cc))
data2plot.df <- cbind(es_evectors.df$wes_id, data2plot.df)
colnames(data2plot.df) <- c("wes_id", "wes_ev1", "wes_ev2", "wes_ev3",
"gwas_ev1", "gwas_ev2", "gwas_ev3", "group")
"UBC" -> data2plot.df[data2plot.df$group == 0, "group"] # "UBC" = 0 = "BLUE"
"CBC" -> data2plot.df[data2plot.df$group == 1, "group"] # "CBC" = 1 = "RED"
data2plot.df$group <- as.factor(data2plot.df$group)
str(data2plot.df)
## 'data.frame': 480 obs. of 8 variables:
## $ wes_id : Factor w/ 480 levels "P1_A01","P1_A02",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ wes_ev1 : num -0.1554 -0.131 -0.0053 -0.0044 0.0143 ...
## $ wes_ev2 : num 0.079 0.0918 -0.0106 0.0003 -0.0315 -0.0194 -0.0059 0.0628 0.0354 0.0005 ...
## $ wes_ev3 : num 0.0247 -0.0669 0.0075 0.0615 0.0098 ...
## $ gwas_ev1: num -0.00389 -0.00379 -0.01011 -0.01288 -0.01086 ...
## $ gwas_ev2: num 0.00266 0.00246 -0.0001 0.00595 0.01157 ...
## $ gwas_ev3: num 0.06803 0.05055 -0.00603 0.00747 0.00144 ...
## $ group : Factor w/ 2 levels "CBC","UBC": 1 1 1 1 1 1 2 1 1 2 ...
# 1st eigenvectors
g <- ggplot(data2plot.df, aes(wes_ev1, gwas_ev1)) +
geom_point(aes(colour=group, text=wes_id)) +
labs(title="1st eigenvectors: WES EIGENSTRAT vs GWAS<br>WECARE, common variants") +
geom_abline(slope = -1, colour = "brown", linetype = 2) +
userColourScale
## Warning: Ignoring unknown aesthetics: text
ggplotly(g)
cor.test(common_es_evectors.df$es_ev1, wecare_phenotypes.df$eig1_gwas)
##
## Pearson's product-moment correlation
##
## data: common_es_evectors.df$es_ev1 and wecare_phenotypes.df$eig1_gwas
## t = -22.552, df = 478, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.7587342 -0.6716505
## sample estimates:
## cor
## -0.7179903
# 2nd eigenvectors
g <- ggplot(data2plot.df, aes(wes_ev2, gwas_ev2)) +
geom_point(aes(colour=group, text=wes_id)) +
labs(title="2nd eigenvectors: WES EIGENSTRAT vs GWAS<br>WECARE, common variants") +
geom_abline(slope = 1, colour = "brown", linetype = 2) +
userColourScale
## Warning: Ignoring unknown aesthetics: text
ggplotly(g)
cor.test(common_es_evectors.df$es_ev2, wecare_phenotypes.df$eig2_gwas)
##
## Pearson's product-moment correlation
##
## data: common_es_evectors.df$es_ev2 and wecare_phenotypes.df$eig2_gwas
## t = 12.97, df = 478, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4408298 0.5735126
## sample estimates:
## cor
## 0.5102006
# 3rd eigenvectors
g <- ggplot(data2plot.df, aes(wes_ev3, gwas_ev3)) +
geom_point(aes(colour=group, text=wes_id)) +
labs(title="3rd eigenvectors: WES EIGENSTRAT vs GWAS<br>WECARE, all variants") +
geom_abline(slope = 1, colour = "brown", linetype = 2) +
userColourScale
## Warning: Ignoring unknown aesthetics: text
ggplotly(g)
cor.test(common_es_evectors.df$es_ev3, wecare_phenotypes.df$eig3_gwas)
##
## Pearson's product-moment correlation
##
## data: common_es_evectors.df$es_ev3 and wecare_phenotypes.df$eig3_gwas
## t = 1.829, df = 478, p-value = 0.06802
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.006180988 0.171585793
## sample estimates:
## cor
## 0.08336558
# Clean-up
rm(data2plot.df, g, colours, userColourScale)
save.image(paste(interim_data_folder, "r05d_compare_egenvectors_wecare_only_jan2017.RData", sep="/"))
ls()
## [1] "common_es_evalues.df" "common_es_evectors.df"
## [3] "common_r_evalues.df" "common_r_evectors.df"
## [5] "common_wecare.eigen" "es_evalues.df"
## [7] "es_evectors.df" "interim_data_folder"
## [9] "r_evalues.df" "r_evectors.df"
## [11] "wecare_phenotypes.df" "wecare.eigen"
sessionInfo()
## R version 3.2.3 (2015-12-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Scientific Linux release 6.8 (Carbon)
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] plotly_4.5.6 ggplot2_2.2.1 dplyr_0.5.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.6 knitr_1.13 magrittr_1.5
## [4] munsell_0.4.3 viridisLite_0.1.3 colorspace_1.2-6
## [7] R6_2.1.2 httr_1.2.1 stringr_1.0.0
## [10] plyr_1.8.4 tools_3.2.3 grid_3.2.3
## [13] gtable_0.2.0 DBI_0.5 htmltools_0.3.5
## [16] yaml_2.1.13 lazyeval_0.2.0 assertthat_0.1
## [19] digest_0.6.10 tibble_1.1 tidyr_0.5.1
## [22] purrr_0.2.2 formatR_1.4 base64enc_0.1-3
## [25] htmlwidgets_0.8 evaluate_0.9 rmarkdown_1.0
## [28] labeling_0.3 stringi_1.1.1 scales_0.4.1
## [31] jsonlite_1.0
Sys.time()
## [1] "2017-03-01 20:52:04 GMT"